import pandas as pd
A = pd.read_csv("C:/Users/cheshi/Desktop/NEW PROJECTS FOR DELOITTE/faces with no emoji/weatherAUS (1).csv")
A
| Date | Location | MinTemp | MaxTemp | Rainfall | Evaporation | Sunshine | WindGustDir | WindGustSpeed | WindDir9am | ... | Humidity3pm | Pressure9am | Pressure3pm | Cloud9am | Cloud3pm | Temp9am | Temp3pm | RainToday | RISK_MM | RainTomorrow | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2008-12-01 | Albury | 13.4 | 22.9 | 0.6 | NaN | NaN | W | 44.0 | W | ... | 22.0 | 1007.7 | 1007.1 | 8.0 | NaN | 16.9 | 21.8 | No | 0.0 | No |
| 1 | 2008-12-02 | Albury | 7.4 | 25.1 | 0.0 | NaN | NaN | WNW | 44.0 | NNW | ... | 25.0 | 1010.6 | 1007.8 | NaN | NaN | 17.2 | 24.3 | No | 0.0 | No |
| 2 | 2008-12-03 | Albury | 12.9 | 25.7 | 0.0 | NaN | NaN | WSW | 46.0 | W | ... | 30.0 | 1007.6 | 1008.7 | NaN | 2.0 | 21.0 | 23.2 | No | 0.0 | No |
| 3 | 2008-12-04 | Albury | 9.2 | 28.0 | 0.0 | NaN | NaN | NE | 24.0 | SE | ... | 16.0 | 1017.6 | 1012.8 | NaN | NaN | 18.1 | 26.5 | No | 1.0 | No |
| 4 | 2008-12-05 | Albury | 17.5 | 32.3 | 1.0 | NaN | NaN | W | 41.0 | ENE | ... | 33.0 | 1010.8 | 1006.0 | 7.0 | 8.0 | 17.8 | 29.7 | No | 0.2 | No |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 142188 | 2017-06-20 | Uluru | 3.5 | 21.8 | 0.0 | NaN | NaN | E | 31.0 | ESE | ... | 27.0 | 1024.7 | 1021.2 | NaN | NaN | 9.4 | 20.9 | No | 0.0 | No |
| 142189 | 2017-06-21 | Uluru | 2.8 | 23.4 | 0.0 | NaN | NaN | E | 31.0 | SE | ... | 24.0 | 1024.6 | 1020.3 | NaN | NaN | 10.1 | 22.4 | No | 0.0 | No |
| 142190 | 2017-06-22 | Uluru | 3.6 | 25.3 | 0.0 | NaN | NaN | NNW | 22.0 | SE | ... | 21.0 | 1023.5 | 1019.1 | NaN | NaN | 10.9 | 24.5 | No | 0.0 | No |
| 142191 | 2017-06-23 | Uluru | 5.4 | 26.9 | 0.0 | NaN | NaN | N | 37.0 | SE | ... | 24.0 | 1021.0 | 1016.8 | NaN | NaN | 12.5 | 26.1 | No | 0.0 | No |
| 142192 | 2017-06-24 | Uluru | 7.8 | 27.0 | 0.0 | NaN | NaN | SE | 28.0 | SSE | ... | 24.0 | 1019.4 | 1016.5 | 3.0 | 2.0 | 15.1 | 26.0 | No | 0.0 | No |
142193 rows × 24 columns
A.isna().sum()
Date 0 Location 0 MinTemp 637 MaxTemp 322 Rainfall 1406 Evaporation 60843 Sunshine 67816 WindGustDir 9330 WindGustSpeed 9270 WindDir9am 10013 WindDir3pm 3778 WindSpeed9am 1348 WindSpeed3pm 2630 Humidity9am 1774 Humidity3pm 3610 Pressure9am 14014 Pressure3pm 13981 Cloud9am 53657 Cloud3pm 57094 Temp9am 904 Temp3pm 2726 RainToday 1406 RISK_MM 0 RainTomorrow 0 dtype: int64
for i in A.columns:
if(A[i].dtypes == "object"):
x = A[i].mode()[0]
A[i] = A[i].fillna(x)
else:
x = A[i].mean()
A[i] = A[i].fillna(x)
con = []
for i in A.columns:
if(A[i].dtypes != "object"):
con.append(i)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
A1 = pd.DataFrame(ss.fit_transform(A[con]),columns = con)
out = []
for i in con:
Q = list(A1[(A1[i]>3)|(A1[i]<-3)].index)
out.extend(Q)
from numpy import unique
outliers=unique(out)
outliers
array([ 8, 12, 51, ..., 142014, 142126, 142127])
A = A.drop(index = outliers,axis = 0)
A.shape
(131761, 24)
A.index = range(0,131761)
con = []
cat = []
for i in A.columns:
if(A[i].dtypes == "object"):
cat.append(i)
else:
con.append(i)
A1 = pd.get_dummies(A[cat])
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
A2 = pd.DataFrame(ss.fit_transform(A[con]),columns=con)
Xnew = A2.join(A1)
pip install statsmodels
Requirement already satisfied: statsmodels in d:\users\cheshi\anaconda4\lib\site-packages (0.14.0) Requirement already satisfied: scipy!=1.9.2,>=1.4 in d:\users\cheshi\anaconda4\lib\site-packages (from statsmodels) (1.11.1) Requirement already satisfied: pandas>=1.0 in d:\users\cheshi\anaconda4\lib\site-packages (from statsmodels) (2.0.3) Requirement already satisfied: numpy>=1.18 in d:\users\cheshi\anaconda4\lib\site-packages (from statsmodels) (1.25.0) Requirement already satisfied: patsy>=0.5.2 in d:\users\cheshi\anaconda4\lib\site-packages (from statsmodels) (0.5.3) Requirement already satisfied: packaging>=21.3 in d:\users\cheshi\anaconda4\lib\site-packages (from statsmodels) (23.0) Requirement already satisfied: python-dateutil>=2.8.2 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=1.0->statsmodels) (2.8.2) Requirement already satisfied: tzdata>=2022.1 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=1.0->statsmodels) (2023.3) Requirement already satisfied: pytz>=2020.1 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=1.0->statsmodels) (2022.7) Requirement already satisfied: six in d:\users\cheshi\anaconda4\lib\site-packages (from patsy>=0.5.2->statsmodels) (1.16.0) Note: you may need to restart the kernel to use updated packages.
A['RainToday'].replace({'No': 0, 'Yes': 1},inplace = True)
A['RainTomorrow'].replace({'No': 0, 'Yes': 1},inplace = True)
import matplotlib.pyplot as plt
fig = plt.figure(figsize = (8,5))
A.RainTomorrow.value_counts(normalize = True).plot(kind='bar', color= ['skyblue','navy'], alpha = 0.9, rot=0)
plt.title('RainTomorrow Indicator No(0) and Yes(1) in the Imbalanced Dataset')
plt.show()
from sklearn.utils import resample
no = A[A.RainTomorrow == 0]
yes = A[A.RainTomorrow == 1]
yes_oversampled = resample(yes, replace=True, n_samples=len(no), random_state=123)
oversampled = pd.concat([no, yes_oversampled])
fig = plt.figure(figsize = (8,5))
oversampled.RainTomorrow.value_counts(normalize = True).plot(kind='bar', color= ['skyblue','navy'], alpha = 0.9, rot=0)
plt.title('RainTomorrow Indicator No(0) and Yes(1) after Oversampling (Balanced Dataset)')
plt.show()
# Missing Data Pattern in Training Data
import seaborn as sns
sns.heatmap(oversampled.isnull(), cbar=False, cmap='PuBu')
<Axes: >
total = oversampled.isnull().sum().sort_values(ascending=False)
percent = (oversampled.isnull().sum()/oversampled.isnull().count()).sort_values(ascending=False)
missing = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing.head(4)
| Total | Percent | |
|---|---|---|
| Date | 0 | 0.0 |
| Location | 0 | 0.0 |
| RISK_MM | 0 | 0.0 |
| RainToday | 0 | 0.0 |
oversampled.select_dtypes(include=['object']).columns
Index(['Date', 'Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm'], dtype='object')
# Impute categorical var with Mode
oversampled['Date'] = oversampled['Date'].fillna(oversampled['Date'].mode()[0])
oversampled['Location'] = oversampled['Location'].fillna(oversampled['Location'].mode()[0])
oversampled['WindGustDir'] = oversampled['WindGustDir'].fillna(oversampled['WindGustDir'].mode()[0])
oversampled['WindDir9am'] = oversampled['WindDir9am'].fillna(oversampled['WindDir9am'].mode()[0])
oversampled['WindDir3pm'] = oversampled['WindDir3pm'].fillna(oversampled['WindDir3pm'].mode()[0])
# Convert categorical features to continuous features with Label Encoding
from sklearn.preprocessing import LabelEncoder
lencoders = {}
for col in oversampled.select_dtypes(include=['object']).columns:
lencoders[col] = LabelEncoder()
oversampled[col] = lencoders[col].fit_transform(oversampled[col])
from sklearn.preprocessing import LabelEncoder
lencoders = {}
for col in oversampled.select_dtypes(include =["object"]).columns:
lencoderds[col] = LabelEncoder()
oversampled[col] = lencoders[col].fit_transform(oversampled[col])
import warnings
warnings.filterwarnings("ignore")
# Multiple Imputation by Chained Equations
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
MiceImputed = oversampled.copy(deep=True)
mice_imputer = IterativeImputer()
MiceImputed.iloc[:, :] = mice_imputer.fit_transform(oversampled)
<pandas.core.indexing._iLocIndexer at 0x1fd21891120>
# Detecting outliers with IQR
Q1 = MiceImputed.quantile(0.25)
Q3 = MiceImputed.quantile(0.75)
IQR = Q3 - Q1
print(IQR)
Date 1540.000000 Location 25.000000 MinTemp 9.000000 MaxTemp 10.000000 Rainfall 1.800000 Evaporation 1.669824 Sunshine 1.024853 WindGustDir 9.000000 WindGustSpeed 17.000000 WindDir9am 8.000000 WindDir3pm 8.000000 WindSpeed9am 12.000000 WindSpeed3pm 11.000000 Humidity9am 25.000000 Humidity3pm 29.000000 Pressure9am 8.300000 Pressure3pm 8.400000 Cloud9am 2.562811 Cloud3pm 2.496833 Temp9am 9.100000 Temp3pm 9.400000 RainToday 1.000000 RISK_MM 4.400000 RainTomorrow 1.000000 dtype: float64
# Removing outliers from the dataset
MiceImputed = MiceImputed[~((MiceImputed < (Q1 - 1.5 * IQR)) |(MiceImputed > (Q3 + 1.5 * IQR))).any(axis=1)]
MiceImputed.shape
(89193, 24)
# Correlation Heatmap
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
corr = MiceImputed.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
f, ax = plt.subplots(figsize=(20, 20))
cmap = sns.diverging_palette(250, 25, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=None, center=0,square=True, annot=True, linewidths=.5, cbar_kws={"shrink": .9})
<Axes: >
sns.pairplot( data=MiceImputed, vars=('MaxTemp','MinTemp','Pressure9am','Pressure3pm', 'Temp9am', 'Temp3pm', 'Evaporation'), hue='RainTomorrow' )
<seaborn.axisgrid.PairGrid at 0x1fd0b9edc30>
# Standardizing data
from sklearn import preprocessing
r_scaler = preprocessing.MinMaxScaler()
r_scaler.fit(MiceImputed)
modified_data = pd.DataFrame(r_scaler.transform(MiceImputed), index=MiceImputed.index, columns=MiceImputed.columns)
# Feature Importance using Filter Method (Chi-Square)
from sklearn.feature_selection import SelectKBest, chi2
X = modified_data.loc[:,modified_data.columns!='RainTomorrow']
y = modified_data[['RainTomorrow']]
selector = SelectKBest(chi2, k=10)
selector.fit(X, y)
X_new = selector.transform(X)
print(X.columns[selector.get_support(indices=True)])
Index(['Rainfall', 'WindGustSpeed', 'Humidity9am', 'Humidity3pm',
'Pressure9am', 'Pressure3pm', 'Cloud3pm', 'Temp3pm', 'RainToday',
'RISK_MM'],
dtype='object')
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier as rf
X = MiceImputed.drop('RainTomorrow', axis=1)
y = MiceImputed['RainTomorrow']
selector = SelectFromModel(rf(n_estimators=100, random_state=0))
selector.fit(X, y)
support = selector.get_support()
features = X.loc[:,support].columns.tolist()
print(features)
print(rf(n_estimators=100, random_state=0).fit(X,y).feature_importances_)
['RISK_MM'] [0.00361265 0.00303465 0.00515607 0.00533627 0.01364907 0.00146548 0.00254283 0.00249897 0.00957051 0.00263579 0.00331491 0.00245111 0.0030674 0.00845189 0.04282585 0.01233348 0.01308554 0.00257482 0.00905154 0.0042079 0.00755055 0.00416249 0.8374202 ]
features = MiceImputed[['Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine', 'WindGustDir',
'WindGustSpeed', 'WindDir9am', 'WindDir3pm', 'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am',
'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am', 'Temp3pm',
'RainToday']]
target = MiceImputed['RainTomorrow']
# Split into test and train
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.25, random_state=12345)
# Normalize Features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
def plot_roc_cur(fper, tper):
plt.plot(fper, tper, color='orange', label='ROC')
plt.plot([0, 1], [0, 1], color='darkblue', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend()
plt.show()
import time
from sklearn.metrics import accuracy_score, roc_auc_score, cohen_kappa_score, roc_curve, classification_report
def run_model(model, X_train, y_train, X_test, y_test, verbose=True):
t0=time.time()
if verbose == False:
model.fit(X_train,y_train, verbose=0)
else:
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)
coh_kap = cohen_kappa_score(y_test, y_pred)
time_taken = time.time()-t0
print("Accuracy = {}".format(accuracy))
print("ROC Area under Curve = {}".format(roc_auc))
print("Cohen's Kappa = {}".format(coh_kap))
print("Time taken = {}".format(time_taken))
print(classification_report(y_test,y_pred,digits=5))
probs = model.predict_proba(X_test)
probs = probs[:, 1]
fper, tper, thresholds = roc_curve(y_test, probs)
plot_roc_cur(fper, tper)
plot_confusion_matrix(model, X_test, y_test,cmap=plt.cm.Blues, normalize = 'all')
return model, accuracy, roc_auc, coh_kap, time_taken
# Logistic Regression
from sklearn.linear_model import LogisticRegression
params_lr = {'penalty': 'l1', 'solver':'liblinear'}
model_lr = LogisticRegression(**params_lr)
model_lr, accuracy_lr, roc_auc_lr, coh_kap_lr, tt_lr = run_model(model_lr, X_train, y_train, X_test, y_test)
# Decision Tree
from sklearn.tree import DecisionTreeClassifier
params_dt = {'max_depth': 16,
'max_features': "sqrt"}
model_dt = DecisionTreeClassifier(**params_dt)
model_dt, accuracy_dt, roc_auc_dt, coh_kap_dt, tt_dt = run_model(model_dt, X_train, y_train, X_test, y_test)
# Neural Network
from sklearn.neural_network import MLPClassifier
params_nn = {'hidden_layer_sizes': (30,30,30),
'activation': 'logistic',
'solver': 'lbfgs',
'max_iter': 500}
model_nn = MLPClassifier(**params_nn)
model_nn, accuracy_nn, roc_auc_nn, coh_kap_nn, tt_nn = run_model(model_nn, X_train, y_train, X_test, y_test)
# Random Forest
from sklearn.ensemble import RandomForestClassifier
params_rf = {'max_depth': 16,
'min_samples_leaf': 1,
'min_samples_split': 2,
'n_estimators': 100,
'random_state': 12345}
model_rf = RandomForestClassifier(**params_rf)
model_rf, accuracy_rf, roc_auc_rf, coh_kap_rf, tt_rf = run_model(model_rf, X_train, y_train, X_test, y_test)
# Light GBM
import lightgbm as lgb
params_lgb ={'colsample_bytree': 0.95,
'max_depth': 16,
'min_split_gain': 0.1,
'n_estimators': 200,
'num_leaves': 50,
'reg_alpha': 1.2,
'reg_lambda': 1.2,
'subsample': 0.95,
'subsample_freq': 20}
model_lgb = lgb.LGBMClassifier(**params_lgb)
model_lgb, accuracy_lgb, roc_auc_lgb, coh_kap_lgb, tt_lgb = run_model(model_lgb, X_train, y_train, X_test, y_test)
# Catboost
!pip install catboost
import catboost as cb
params_cb ={'iterations': 50,
'max_depth': 16}
model_cb = cb.CatBoostClassifier(**params_cb)
model_cb, accuracy_cb, roc_auc_cb, coh_kap_cb, tt_cb = run_model(model_cb, X_train, y_train, X_test, y_test, verbose=False)
# XGBoost
import xgboost as xgb
params_xgb ={'n_estimators': 500,
'max_depth': 16}
model_xgb = xgb.XGBClassifier(**params_xgb)
model_xgb, accuracy_xgb, roc_auc_xgb, coh_kap_xgb, tt_xgb = run_model(model_xgb, X_train, y_train, X_test, y_test)
Accuracy = 0.7309296381003633
ROC Area under Curve = 0.7131732529423768
Cohen's Kappa = 0.43544238478007957
Time taken = 1.1369616985321045
precision recall f1-score support
0 0.74249 0.82234 0.78038 12963
1 0.71002 0.60401 0.65274 9336
accuracy 0.73093 22299
macro avg 0.72626 0.71317 0.71656 22299
weighted avg 0.72890 0.73093 0.72694 22299
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[59], line 7 4 params_lr = {'penalty': 'l1', 'solver':'liblinear'} 6 model_lr = LogisticRegression(**params_lr) ----> 7 model_lr, accuracy_lr, roc_auc_lr, coh_kap_lr, tt_lr = run_model(model_lr, X_train, y_train, X_test, y_test) 9 # Decision Tree 10 from sklearn.tree import DecisionTreeClassifier Cell In[58], line 25, in run_model(model, X_train, y_train, X_test, y_test, verbose) 22 fper, tper, thresholds = roc_curve(y_test, probs) 23 plot_roc_cur(fper, tper) ---> 25 plot_confusion_matrix(model, X_test, y_test,cmap=plt.cm.Blues, normalize = 'all') 27 return model, accuracy, roc_auc, coh_kap, time_taken NameError: name 'plot_confusion_matrix' is not defined
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import itertools
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
import catboost as cb
import xgboost as xgb
from mlxtend.classifier import EnsembleVoteClassifier
from mlxtend.plotting import plot_decision_regions
value = 1.80
width = 0.90
clf1 = LogisticRegression(random_state=12345)
clf2 = DecisionTreeClassifier(random_state=12345)
clf3 = MLPClassifier(random_state=12345, verbose = 0)
clf4 = RandomForestClassifier(random_state=12345)
clf5 = lgb.LGBMClassifier(random_state=12345, verbose = 0)
clf6 = cb.CatBoostClassifier(random_state=12345, verbose = 0)
clf7 = xgb.XGBClassifier(random_state=12345)
eclf = EnsembleVoteClassifier(clfs=[clf4, clf5, clf6, clf7], weights=[1, 1, 1, 1], voting='soft')
X_list = MiceImputed[["Sunshine", "Humidity9am", "Cloud3pm"]] #took only really important features
X = np.asarray(X_list, dtype=np.float32)
y_list = MiceImputed["RainTomorrow"]
y = np.asarray(y_list, dtype=np.int32)
# Plotting Decision Regions
gs = gridspec.GridSpec(3,3)
fig = plt.figure(figsize=(18, 14))
labels = ['Logistic Regression',
'Decision Tree',
'Neural Network',
'Random Forest',
'LightGBM',
'CatBoost',
'XGBoost',
'Ensemble']
for clf, lab, grd in zip([clf1, clf2, clf3, clf4, clf5, clf6, clf7, eclf],
labels,
itertools.product([0, 1, 2],
repeat=2)):
clf.fit(X, y)
ax = plt.subplot(gs[grd[0], grd[1]])
fig = plot_decision_regions(X=X, y=y, clf=clf,
filler_feature_values={2: value},
filler_feature_ranges={2: width},
legend=2)
plt.title(lab)
plt.show()
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001348 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`. [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000664 seconds. You can set `force_row_wise=true` to remove the overhead. And if memory is not enough, you can set `force_col_wise=true`.
pip install lightgbm
Collecting lightgbmNote: you may need to restart the kernel to use updated packages.
Downloading lightgbm-4.0.0-py3-none-win_amd64.whl (1.3 MB)
---------------------------------------- 1.3/1.3 MB 3.0 MB/s eta 0:00:00
Requirement already satisfied: numpy in d:\users\cheshi\anaconda4\lib\site-packages (from lightgbm) (1.25.0)
Requirement already satisfied: scipy in d:\users\cheshi\anaconda4\lib\site-packages (from lightgbm) (1.11.1)
Installing collected packages: lightgbm
Successfully installed lightgbm-4.0.0
pip install catboost
Collecting catboost
Downloading catboost-1.2-cp310-cp310-win_amd64.whl (101.0 MB)
-------------------------------------- 101.0/101.0 MB 3.9 MB/s eta 0:00:00
Collecting plotly
Downloading plotly-5.15.0-py2.py3-none-any.whl (15.5 MB)
---------------------------------------- 15.5/15.5 MB 2.4 MB/s eta 0:00:00
Requirement already satisfied: numpy>=1.16.0 in d:\users\cheshi\anaconda4\lib\site-packages (from catboost) (1.25.0)
Requirement already satisfied: six in d:\users\cheshi\anaconda4\lib\site-packages (from catboost) (1.16.0)
Requirement already satisfied: pandas>=0.24 in d:\users\cheshi\anaconda4\lib\site-packages (from catboost) (2.0.3)
Requirement already satisfied: matplotlib in d:\users\cheshi\anaconda4\lib\site-packages (from catboost) (3.7.2)
Requirement already satisfied: scipy in d:\users\cheshi\anaconda4\lib\site-packages (from catboost) (1.11.1)
Collecting graphviz
Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
---------------------------------------- 47.0/47.0 kB 1.2 MB/s eta 0:00:00
Requirement already satisfied: tzdata>=2022.1 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=0.24->catboost) (2023.3)
Requirement already satisfied: pytz>=2020.1 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=0.24->catboost) (2022.7)
Requirement already satisfied: python-dateutil>=2.8.2 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=0.24->catboost) (2.8.2)
Requirement already satisfied: cycler>=0.10 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (0.11.0)
Requirement already satisfied: contourpy>=1.0.1 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (1.1.0)
Requirement already satisfied: pillow>=6.2.0 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (9.4.0)
Requirement already satisfied: kiwisolver>=1.0.1 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (1.4.4)
Requirement already satisfied: packaging>=20.0 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (23.0)
Requirement already satisfied: pyparsing<3.1,>=2.3.1 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (3.0.9)
Requirement already satisfied: fonttools>=4.22.0 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib->catboost) (4.41.0)
Collecting tenacity>=6.2.0
Using cached tenacity-8.2.2-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, graphviz, plotly, catboost
Successfully installed catboost-1.2 graphviz-0.20.1 plotly-5.15.0 tenacity-8.2.2
Note: you may need to restart the kernel to use updated packages.
pip install xgboost
Collecting xgboost
Downloading xgboost-1.7.6-py3-none-win_amd64.whl (70.9 MB)
---------------------------------------- 70.9/70.9 MB 3.6 MB/s eta 0:00:00
Requirement already satisfied: scipy in d:\users\cheshi\anaconda4\lib\site-packages (from xgboost) (1.11.1)
Requirement already satisfied: numpy in d:\users\cheshi\anaconda4\lib\site-packages (from xgboost) (1.25.0)
Installing collected packages: xgboost
Successfully installed xgboost-1.7.6
Note: you may need to restart the kernel to use updated packages.
pip install mlxtend
Collecting mlxtend
Downloading mlxtend-0.22.0-py2.py3-none-any.whl (1.4 MB)
---------------------------------------- 1.4/1.4 MB 6.6 MB/s eta 0:00:00
Requirement already satisfied: setuptools in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (65.6.3)
Requirement already satisfied: matplotlib>=3.0.0 in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (3.7.2)
Requirement already satisfied: joblib>=0.13.2 in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (1.3.1)
Requirement already satisfied: numpy>=1.16.2 in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (1.25.0)
Requirement already satisfied: scikit-learn>=1.0.2 in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (1.3.0)
Requirement already satisfied: pandas>=0.24.2 in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (2.0.3)
Requirement already satisfied: scipy>=1.2.1 in d:\users\cheshi\anaconda4\lib\site-packages (from mlxtend) (1.11.1)
Requirement already satisfied: kiwisolver>=1.0.1 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (1.4.4)
Requirement already satisfied: fonttools>=4.22.0 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (4.41.0)
Requirement already satisfied: pillow>=6.2.0 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (9.4.0)
Requirement already satisfied: cycler>=0.10 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (0.11.0)
Requirement already satisfied: packaging>=20.0 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (23.0)
Requirement already satisfied: python-dateutil>=2.7 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (2.8.2)
Requirement already satisfied: pyparsing<3.1,>=2.3.1 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (3.0.9)
Requirement already satisfied: contourpy>=1.0.1 in d:\users\cheshi\anaconda4\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (1.1.0)
Requirement already satisfied: tzdata>=2022.1 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=0.24.2->mlxtend) (2023.3)
Requirement already satisfied: pytz>=2020.1 in d:\users\cheshi\anaconda4\lib\site-packages (from pandas>=0.24.2->mlxtend) (2022.7)
Requirement already satisfied: threadpoolctl>=2.0.0 in d:\users\cheshi\anaconda4\lib\site-packages (from scikit-learn>=1.0.2->mlxtend) (3.1.0)
Requirement already satisfied: six>=1.5 in d:\users\cheshi\anaconda4\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.22.0
Note: you may need to restart the kernel to use updated packages.